############################################################
#Promoting a Diverse Bench 
#by Jaclyn Kaslovsky, Albert Rivero, and Andrew Stone
#This file provides the code for all results in the Main Text
#############################################################

# Load in the necessary packages
library(xtable)
library(stargazer)
library(lubridate)
library(tidyverse)
library(dplyr)
library(estimatr)
library(rms)
library(lfe)
library(dplyr)
library(ggplot2)
library(margins)
#devtools::install_github("ChandlerLutz/starpolishr")
library(starpolishr)

# When downloading, place this code in a folder called "Code"
# and the data in a folder called "Data," both in the same enclosing folder

# Set the working directory
setwd("../")

# Load in the senator-level, CES, and press release-level datasets
load("Data/senator_level_data.RData")
load("Data/combined_ces.RData")
load("Data/gender_press_releases.RData")
load("Data/race_press_releases.RData")

####################################
# Senator-level messaging analysis #
####################################

##############################################################################
# In-text descriptive statistics related to senator-level messaging analysis #
##############################################################################
# What percentage of senators reference gender?
round(mean(senator_level_data$any_discussion_gender_mention),2)

# What percentage of senators reference race?
round(mean(senator_level_data$any_discussion_race_mention[!is.na(senator_level_data$any_discussion_race_mention)]),3)

# What percentage of senators who mention race also mention gender?
round(prop.table(table(senator_level_data$any_discussion_gender_mention[which(senator_level_data$any_discussion_race_mention==1)])),2)

# What percentage of press releases mentioning Barrett's gender are positive? 
round(mean(dat_gender$woman_positive_sentiment[dat_gender$nomination=="barrett" & !is.na(dat_gender$woman_positive_sentiment)]),2)

# What percentage of press releases mentioning Jackson's race are positive? 
round(mean(dat_race$race_positive_sentiment[dat_race$nomination=="jackson" & !is.na(dat_race$race_positive_sentiment)]),2)

# What percentage of messages about gender were positive in nature across party combinations?
# Republicans discussing outparty nominee gender only constitutes 42 messages in our data
dat_gender %>% 
  mutate(dem_nominee = ifelse(nomination %in% c("barrett","miers"), 0, 1)) %>% 
  filter(woman_reference_discussion == 1) %>% 
  mutate(woman_positive_sentiment = ifelse(is.na(woman_positive_sentiment),0,woman_positive_sentiment)) %>%
  group_by(dem_nominee, democrat) %>% 
  dplyr::summarize(woman_positive_sentiment = mean(woman_positive_sentiment), n =n())

# Proportion of positive gender mentions made by copartisans/outpartians? 
round(prop.table(table(ifelse(senator_level_data$any_positive_gender == 0, "No Positive","Positive"), ifelse(senator_level_data$copartisan_president == 1, "Copartisan","Outpartisan")), margin=1),2)

# Proportion of positive race mentions made by copartisans/outpartians? 
round(prop.table(table(ifelse(senator_level_data$any_positive_race == 0, "No Positive","Positive"), ifelse(senator_level_data$copartisan_president == 1, "Copartisan","Outpartisan")), margin=1),2)

############################################################
# Table 2: Regressions of senator-level messaging behavior #
############################################################
# Binary measure of any positive statement
either_any <- lm(any_positive_either ~ democrat + party_leader + on_sjc + freshman + female + same_race + copartisan_president + marginal_party_didnt_win*up_for_election +
                 + state + nomination, data=senator_level_data)
summary(either_any)
# Same model, but with robust SEs clustered by state
either_any_robust <- lm_robust(any_positive_either ~ democrat + party_leader + on_sjc + freshman + female + same_race + copartisan_president + marginal_party_didnt_win*up_for_election +
                 + state + nomination, data=senator_level_data, clusters = state)
summary(either_any_robust)

# Marginal effect of marginality at different levels of up for election to discuss in main text
margins1 <- margins(either_any_robust, variables="marginal_party_didnt_win",at=list(up_for_election=c(0,1)))
summary(margins1)
# Check marginal effect
lm_robust(any_positive_either ~ democrat + party_leader + on_sjc + freshman + female + same_race + copartisan_president + marginal_party_didnt_win*I(up_for_election==0) +
            + state + nomination, data=senator_level_data, clusters = state) %>% summary()

# Count measure of number of positive statement messages
either_count <- lm(number_positive_either ~ democrat + party_leader + on_sjc + freshman + female + same_race + copartisan_president + marginal_party_didnt_win*up_for_election +
                 + state + nomination, data=senator_level_data)
summary(either_count)
# Same model, but with robust SEs clustered by state
either_count_robust <- lm_robust(number_positive_either ~ democrat + party_leader + on_sjc +  freshman + female + same_race + copartisan_president + marginal_party_didnt_win*up_for_election +
                                 + state + nomination, data=senator_level_data, clusters=state)
summary(either_count_robust)

# Regressions into a latex table
main.reg.latex <- stargazer(either_any,either_count,
                            model.numbers=FALSE,
                            digits=2, dep.var.labels=c("Any Discussion","Count of Discussion"),
                            covariate.labels=c("Democrat","Party Leader","On Judiciary Committee","Freshman","Woman","Same Race","Copartisan of President","Electorally Marginal", "Up for Election","Electorally Marginal x Up for Election"),
                            star.cutoffs=c(0.10,0.05),
                            no.space=T,
                            notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of senator messaging about a nominee’s descriptive traits as a function of senator characteristics. 
          Robust standard errors clustered at the state level are denoted in parentheses. $^{**}p<0.05$; $^*p<0.1$.}", 
                            omit = c("state[A-Z][A-Z]","nomination*"),
                            omit.stat = c("adj.rsq","f"),
                            title = "Determinants of Senator Messaging about Nominee Descriptive Characteristics",
                            label = "ols-senator-messaging-about-both",
                            notes.append = FALSE,notes.label = "",
                            add.lines=list(c("State Fixed Effects","\\cmark","\\cmark"),
                                           c("Nomination Fixed Effects","\\cmark","\\cmark")),
                            font.size = "small",
                            se = starprep(either_any_robust,either_count_robust),
                            p = starprep(either_any_robust,either_count_robust, stat = "p.value"),
                            table.placement = "H")

# Outputting
cat(main.reg.latex, sep = '\n', file = "Paper/Table2.tex")

# Comparing mean numbers of positive messages about gender and race by gender and race of messenger
senator_level_data %>% 
  group_by(female, minority) %>% 
  summarise(n(), mean(number_positive_gender), sum(any_positive_gender))

senator_level_data %>% 
  group_by(female, minority) %>% 
  summarise(n(), mean(number_positive_race, na.rm = T), sum(any_positive_race, na.rm = T))

# Removing model output
rm(list=setdiff(ls(), c("senator_level_data","ces_total")))

#######################################
# Cooperative Election Study analysis #
#######################################

##########################################################
# In-text descriptive statistics related to CES analysis #
##########################################################
# Binary senator approval, weighted
round(weighted.mean(ces_total$binary_senator_approval, w = ces_total$weight_cumulative, na.rm=T),2)

# Average number of positive messages about race and gender
round(mean(senator_level_data$number_positive_gender),2)
round(mean(senator_level_data$number_positive_race[!is.na(senator_level_data$number_positive_race)]),2)

#####################################################################
# Figure 1: Analysis of messaging about gender and Senator approval #
#####################################################################
######################
# Pooled respondents #
######################
# Going to run all of the regressions first, output individual tables, then combine into one figure
# Regression formula that we will use to run the models, 4pt approval outcome, respondent-level fixed effects, no instrumenting, clustered SEs by respondent
std_formula <- formula(senator_approval2 ~ binary_positive_gender*match_senator_party_leaners +
                         I(log(total_nominee_messages + 0.01)) +
                         extremity + seniority + I(seniority^2) + majority +
                         female + afam + latino + votepct + chair | respondentid | 0 | respondentid)
# Because there are no African American senators at the time of Sotomayor/Kagan who also have a votepct, we will need an alternative formula for them (and throughout) dropping afam
std_formula_alt <- formula(senator_approval2 ~ binary_positive_gender*match_senator_party_leaners +
                         I(log(total_nominee_messages + 0.01)) +
                         extremity + seniority + I(seniority^2) + majority +
                         female + latino + votepct + chair | respondentid | 0 | respondentid)

# Pooled model (all nominations)
Pooled.g <- felm(std_formula, data=ces_total)
# Effect for copartisans (to mention in main text)
round(Pooled.g$coefficients[1]+Pooled.g$coefficients[13],2)
# Check significance
felm(senator_approval2 ~ binary_positive_gender*I(match_senator_party_leaners==0) +
       I(log(total_nominee_messages + 0.01)) +
       extremity + seniority + I(seniority^2) + majority +
       female + afam + latino + votepct + chair | respondentid | 0 | respondentid, data = ces_total) %>% summary()

# Sotomayor model
Sotomayor.g <- felm(std_formula_alt, data=ces_total %>% filter(nomination == "sotomayor"))
# Kagan model
Kagan.g <- felm(std_formula_alt, data=ces_total %>% filter(nomination == "kagan"))
# Barrett model
Barrett.g <- felm(std_formula, data=ces_total %>% filter(nomination == "barrett"))
# Jackson model
Jackson.g <- felm(std_formula, data=ces_total %>% filter(nomination == "jackson"))

# Extract the coefficients from all of the model objects in the R environment (denoted by .g)
# First, list of all model objects
Listdf <- mget(ls(pattern = '\\.g'))
# Empty data frame to store results
results_full <- data.frame()

# For each model, get the coefficients and SEs for the key explanatory variables (positive mention of gender and that interacted with copartisanship)
# Then, put them all together in a dataframe
for(i in 1:length(Listdf)){
  # The ith model
  df_obj <- Listdf[[i]]
  # Get key coefficients and SEs
  coef1 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_gender"])
  colnames(coef1)[1] <- "coef"
  se1 <- data.frame(df_obj$cse["binary_positive_gender"])
  colnames(se1)[1] <- "se"
  coef2 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_gender:match_senator_party_leaners"])
  colnames(coef2)[1] <- "coef"
  se2 <- data.frame(df_obj$cse["binary_positive_gender:match_senator_party_leaners"])
  colnames(se2)[1] <- "se"
  # Put coefs and SEs together
  coefs <- bind_rows(coef1, coef2)
  ses <- bind_rows(se1, se2)
  results <- bind_cols(coefs, ses)
  # Name of the model (Barrett, Jackson, etc.)
  results$reg <- names(Listdf)[i]
  # Add the results to the data frame
  results_full <- bind_rows(results_full,results)
  # Remove the .g from the name
  results_full$reg <- gsub(".g","",results_full$reg,fixed = TRUE)
}

# Removing unnecessary objects
rm(coef1, se1, coef2, se2, coefs, ses, Pooled.g, Sotomayor.g, Kagan.g, Barrett.g, Jackson.g, Listdf, df_obj, i, results)

#####################
# Women respondents #
#####################
# Subset to women
woman.respondents <- ces_total[which(ces_total$woman == 1),]

# Running all of the models, same formula (model) as for all respondents above but on the women respondents only
Pooled.g <- felm(std_formula, woman.respondents)
summary(Pooled.g)

Sotomayor.g <- felm(std_formula_alt, woman.respondents %>% filter(nomination == "sotomayor"))
summary(Sotomayor.g)

Kagan.g <- felm(std_formula_alt, woman.respondents %>% filter(nomination == "kagan"))
summary(Kagan.g)

Barrett.g <- felm(std_formula, woman.respondents %>% filter(nomination == "barrett"))
summary(Barrett.g)

Jackson.g <- felm(std_formula, woman.respondents %>% filter(nomination == "jackson"))
summary(Jackson.g)

# Extract the coefficients from all of the model objects in the R environment (denoted by .g)
# First, list of all model objects
Listdf <- mget(ls(pattern = '\\.g'))
# Empty data frame to store results
results_full_women <- data.frame()

# For each model, get the coefficients and SEs for the key explanatory variables (positive mention of gender and that interacted with copartisanship)
# Then, put them all together in a dataframe
for(i in 1:length(Listdf)){
  df_obj <- Listdf[[i]]
  coef1 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_gender"])
  colnames(coef1)[1] <- "coef"
  se1 <- data.frame(df_obj$cse["binary_positive_gender"])
  colnames(se1)[1] <- "se"
  coef2 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_gender:match_senator_party_leaners"])
  colnames(coef2)[1] <- "coef"
  se2 <- data.frame(df_obj$cse["binary_positive_gender:match_senator_party_leaners"])
  colnames(se2)[1] <- "se"
  coefs <- bind_rows(coef1, coef2)
  ses <- bind_rows(se1, se2)
  results <- bind_cols(coefs, ses)
  results$reg <- names(Listdf)[i]
  results_full_women<- bind_rows(results_full_women,results)
  results_full_women$reg <- gsub(".g","",results_full_women$reg,fixed = TRUE)
}

# Removing unnecessary objects
rm(coef1, se1, coef2, se2, coefs, ses, Pooled.g, Sotomayor.g, Kagan.g, Barrett.g, Jackson.g, Listdf, df_obj, i, results)

###################
# Men respondents #
###################
# Subset to non-women
man.respondents <- ces_total[which(ces_total$woman == 0),]

# Running all of the models, same formula (model) as for all respondents above but on the men respondents only
Pooled.g <- felm(std_formula, man.respondents)
summary(Pooled.g)

Sotomayor.g <- felm(std_formula_alt, man.respondents %>% filter(nomination == "sotomayor"))
summary(Sotomayor.g)

Kagan.g <- felm(std_formula_alt, man.respondents %>% filter(nomination == "kagan"))
summary(Kagan.g)

Barrett.g <- felm(std_formula, man.respondents %>% filter(nomination == "barrett"))
summary(Barrett.g)

Jackson.g <- felm(std_formula, man.respondents %>% filter(nomination == "jackson"))
summary(Jackson.g)

# Extract the coefficients from all of the model objects in the R environment (denoted by .g)
# First, list of all model objects
Listdf <- mget(ls(pattern = '\\.g'))
# Empty data frame to store results
results_full_men <- data.frame()

# For each model, get the coefficients and SEs for the key explanatory variables (positive mention of gender and that interacted with copartisanship)
# Then, put them all together in a dataframe
for(i in 1:length(Listdf)){
  df_obj <- Listdf[[i]]
  coef1 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_gender"])
  colnames(coef1)[1] <- "coef"
  se1 <- data.frame(df_obj$cse["binary_positive_gender"])
  colnames(se1)[1] <- "se"
  coef2 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_gender:match_senator_party_leaners"])
  colnames(coef2)[1] <- "coef"
  se2 <- data.frame(df_obj$cse["binary_positive_gender:match_senator_party_leaners"])
  colnames(se2)[1] <- "se"
  coefs <- bind_rows(coef1, coef2)
  ses <- bind_rows(se1, se2)
  results <- bind_cols(coefs, ses)
  results$reg <- names(Listdf)[i]
  results_full_men<- bind_rows(results_full_men,results)
  results_full_men$reg <- gsub(".g","",results_full_men$reg,fixed = TRUE)
}

# Removing unnecessary objects
rm(coef1, se1, coef2, se2, coefs, ses, Pooled.g, Sotomayor.g, Kagan.g, Barrett.g, Jackson.g, Listdf, df_obj, i, results)

###################################################################
# Making the figure, combining all gender results into one figure #
###################################################################
# Labels to our results datasets denoting which sample of respondents they were run on
results_full$gender <- "All Respondents"
results_full_women$gender <- "Women"
results_full_men$gender <- "Men/Non-binary/Other"

# Combining all results together
results_combined <- bind_rows(results_full, results_full_women, results_full_men)

# Cleaner variable names to display
results_combined$term <- rownames(results_combined)
results_combined$term <- gsub("\\.{3}","", results_combined$term)
results_combined$term <- gsub("[0-9]+","", results_combined$term)
results_combined$term[results_combined$term=="binary_positive_gender"] <- "Any Positive Messages"
results_combined$term[results_combined$term=="binary_positive_gender:match_senator_party_leaners"] <- "Any Positive Messages x Copartisan"

# Calculate the 90 and 95 percent confidence intervals
results_combined$lb <- results_combined$coef - qnorm(.975)*results_combined$se
results_combined$ub <- results_combined$coef + qnorm(.975)*results_combined$se
results_combined$lb2 <- results_combined$coef - qnorm(.95)*results_combined$se
results_combined$ub2 <- results_combined$coef + qnorm(.95)*results_combined$se

# Factorizing variable names
results_combined$term <- as.character(results_combined$term)
results_combined$term <- as.factor(results_combined$term)

# Factorizing context name
results_combined <- results_combined %>%
  mutate(reg = fct_relevel(reg, "Jackson","Barrett","Kagan","Sotomayor","Pooled"))

# Making the figure
fig_gender <- ggplot(results_combined, aes(x = reg, y = coef, group=term, color=term)) +
  geom_point(size=2,position=position_dodge(width=0.5)) + # Coefficients
  geom_errorbar(aes(ymin = lb, ymax = ub), width = 0.1, position=position_dodge(width=0.5)) + # 95% CI
  geom_errorbar(aes(ymin = lb2, ymax = ub2), size=1, width = 0, position=position_dodge(width=0.5)) + # 90% CI
  geom_hline(yintercept = 0, lty = 2) + # Zero line
  xlab("Nominee") + theme_bw() + # Labeling and changing plot theme
  ylab("Coefficient Estimate") + # Labeling
  coord_flip() + labs(title="") + # Flipping orientation
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.title=element_blank())+ facet_grid(. ~ gender)+ scale_color_manual(values=c("#999999", "mediumpurple4")) + # Faceting by gender, changing color scheme 
  theme(legend.position = "bottom") # Legend at bottom

# Outputting
ggsave("Paper/Figure1.png", fig_gender, width = 10, height = 7) 

rm(results_full,results_combined,results_full_men,results_full_women,man.respondents,woman.respondents)

###################################################################
# Figure 2: Analysis of messaging about race and Senator approval #
###################################################################
######################
# Pooled respondents #
######################
# Going to run all of the regressions first, output individual tables, then combine into one figure
# Regression formula that we will use to run the models, 4pt approval outcome, respondent-level fixed effects, no instrumenting, clustered SEs by respondent
formula_race <- formula(senator_approval2 ~ binary_positive_race*match_senator_party_leaners +
                          I(log(total_nominee_messages + 0.01)) +
                          extremity + seniority + I(seniority^2) + majority +
                          female + afam + latino + votepct + chair | respondentid | 0 | respondentid)
# Version without afam
formula_race_alt <- formula(senator_approval2 ~ binary_positive_race*match_senator_party_leaners +
                          I(log(total_nominee_messages + 0.01)) +
                          extremity + seniority + I(seniority^2) + majority +
                          female + latino + votepct + chair | respondentid | 0 | respondentid)

# Pooled model (all nominations), race messaging
Pooled.r <- felm(formula_race, data=ces_total)
summary(Pooled.r)
# Sotomayor model, race messaging
Sotomayor.r <- felm(formula_race_alt, data=ces_total %>% filter(nomination == "sotomayor"))
summary(Sotomayor.r)
# Effect for copartisans
felm(senator_approval2 ~ binary_positive_race*I(match_senator_party_leaners == 0) +
       I(log(total_nominee_messages + 0.01)) +
       extremity + seniority + I(seniority^2) + majority +
       female + latino + votepct + chair | respondentid | 0 | respondentid, data = ces_total %>% filter(nomination == "sotomayor")) %>%
  summary()
# Jackson model, race messaging
Jackson.r <- felm(formula_race, data=ces_total %>% filter(nomination == "jackson"))
summary(Jackson.r)
# Effect for copartisans
felm(senator_approval2 ~ binary_positive_race*I(match_senator_party_leaners == 0) +
       I(log(total_nominee_messages + 0.01)) +
       extremity + seniority + I(seniority^2) + majority +
       female + afam + latino + votepct + chair | respondentid | 0 | respondentid, data = ces_total %>% filter(nomination == "jackson")) %>%
  summary()

# Extract the coefficients from all of the model objects in the R environment (denoted by .r)
# First, list of all model objects
Listdf <- mget(ls(pattern = '\\.r'))
# Empty data frame to store results
results_full <- data.frame()

# For each model, get the coefficients and SEs for the key explanatory variables (positive mention of race and that interacted with copartisanship)
# Then, put them all together in a dataframe
for(i in 1:length(Listdf)){
  df_obj <- Listdf[[i]]
  coef1 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_race"])
  colnames(coef1)[1] <- "coef"
  se1 <- data.frame(df_obj$cse["binary_positive_race"])
  colnames(se1)[1] <- "se"
  coef2 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_race:match_senator_party_leaners"])
  colnames(coef2)[1] <- "coef"
  se2 <- data.frame(df_obj$cse["binary_positive_race:match_senator_party_leaners"])
  colnames(se2)[1] <- "se"
  coefs <- bind_rows(coef1, coef2)
  ses <- bind_rows(se1, se2)
  results <- bind_cols(coefs, ses)
  results$reg <- names(Listdf)[i]
  results_full <- bind_rows(results_full,results)
  results_full$reg <- gsub(".r","",results_full$reg,fixed = TRUE)
}

rm(coef1, se1, coef2, se2, coefs, ses, Pooled.r, Sotomayor.r, Jackson.r, Listdf)

#########################
# Same race respondents #
#########################
# Subsetting to same race respondents (Hispanic for Sotomayor, Black for Jackson)
sameracerespondents.df <- ces_total %>% filter( (hispanic == 1 & year == 2009) | (black == 1 & year == 2022) )

# Running all of the models, same formula (model) as for all respondents above but on the same race respondents only
Pooled.r <- felm(formula_race, data=sameracerespondents.df)
summary(Pooled.r)

Sotomayor.r <- felm(formula_race_alt, data=sameracerespondents.df %>% filter(nomination == "sotomayor"))
summary(Sotomayor.r)

Jackson.r <- felm(formula_race, data=sameracerespondents.df %>% filter(nomination == "jackson"))
summary(Jackson.r)

# Extract the coefficients from all of the model objects in the R environment (denoted by .r)
# First, list of all model objects
Listdf <- mget(ls(pattern = '\\.r'))
# Empty data frame to store results
results_full_samerace <- data.frame()

# For each model, get the coefficients and SEs for the key explanatory variables (positive mention of race and that interacted with copartisanship)
# Then, put them all together in a dataframe
for(i in 1:length(Listdf)){
  df_obj <- Listdf[[i]]
  coef1 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_race"])
  colnames(coef1)[1] <- "coef"
  se1 <- data.frame(df_obj$cse["binary_positive_race"])
  colnames(se1)[1] <- "se"
  coef2 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_race:match_senator_party_leaners"])
  colnames(coef2)[1] <- "coef"
  se2 <- data.frame(df_obj$cse["binary_positive_race:match_senator_party_leaners"])
  colnames(se2)[1] <- "se"
  coefs <- bind_rows(coef1, coef2)
  ses <- bind_rows(se1, se2)
  results <- bind_cols(coefs, ses)
  results$reg <- names(Listdf)[i]
  results_full_samerace <- bind_rows(results_full_samerace,results)
  results_full_samerace$reg <- gsub(".r","",results_full_samerace$reg,fixed = TRUE)
}

rm(coef1, se1, coef2, se2, coefs, ses, Pooled.r, Sotomayor.r, Jackson.r, Listdf)

##############################
# Different race respondents #
##############################
# Subsetting to non-same race respondents; this returns all CES respondents who aren't in our same race data frame
# Non-minority nominee years will drop out as binary_positive_race will be NA for those respondents
diffracerespondents.df <- ces_total %>% anti_join(sameracerespondents.df)

# Running all of the models, same formula (model) as for all respondents above but on the same race respondents only
Pooled.r <- felm(formula_race, data=diffracerespondents.df)
summary(Pooled.r)

Sotomayor.r <- felm(formula_race_alt, data=diffracerespondents.df %>% filter(nomination == "sotomayor"))
summary(Sotomayor.r)

Jackson.r <- felm(formula_race, data=diffracerespondents.df %>% filter(nomination == "jackson"))
summary(Jackson.r)

# Extract the coefficients from all of the model objects in the R environment (denoted by .r)
# First, list of all model objects
Listdf <- mget(ls(pattern = '\\.r'))
# Empty data frame to store results
results_full_diffrace <- data.frame()

# For each model, get the coefficients and SEs for the key explanatory variables (positiv emention of race and that interacted with copartisanship)
# Then, put them all together in a dataframe
for(i in 1:length(Listdf)){
  df_obj <- Listdf[[i]]
  coef1 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_race"])
  colnames(coef1)[1] <- "coef"
  se1 <- data.frame(df_obj$cse["binary_positive_race"])
  colnames(se1)[1] <- "se"
  coef2 <- data.frame(df_obj$coefficients[rownames(df_obj$coefficients) == "binary_positive_race:match_senator_party_leaners"])
  colnames(coef2)[1] <- "coef"
  se2 <- data.frame(df_obj$cse["binary_positive_race:match_senator_party_leaners"])
  colnames(se2)[1] <- "se"
  coefs <- bind_rows(coef1, coef2)
  ses <- bind_rows(se1, se2)
  results <- bind_cols(coefs, ses)
  results$reg <- names(Listdf)[i]
  results_full_diffrace <- bind_rows(results_full_diffrace,results)
  results_full_diffrace$reg <- gsub(".r","",results_full_diffrace$reg,fixed = TRUE)
}

rm(coef1, se1, coef2, se2, coefs, ses, Pooled.r, Sotomayor.r, Jackson.r, Listdf)

#################################################################
# Making the figure, combining all race results into one figure #
#################################################################
# Labels to our results datasets denoting which sample of respondents they were run on
results_full$race <- "All Respondents"
results_full_samerace$race <- "Same Race Respondents"
results_full_diffrace$race <- "Different Race Respondents"

# Combining all results together
results_combined <- bind_rows(results_full, results_full_samerace, results_full_diffrace)

# Cleaner variable names to display
results_combined$term <- rownames(results_combined)
results_combined$term <- gsub("\\.{3}","", results_combined$term)
results_combined$term <- gsub("[0-9]+","", results_combined$term)
results_combined$term[results_combined$term=="binary_positive_race"] <- "Any Positive Messages"
results_combined$term[results_combined$term=="binary_positive_race:match_senator_party_leaners"] <- "Any Positive Messages x Copartisan"

# Calculate the 90 and 95 percent confidence intervals
results_combined$lb <- results_combined$coef - qnorm(.975)*results_combined$se
results_combined$ub <- results_combined$coef + qnorm(.975)*results_combined$se
results_combined$lb2 <- results_combined$coef - qnorm(.95)*results_combined$se
results_combined$ub2 <- results_combined$coef + qnorm(.95)*results_combined$se

# Factorizing variable names
results_combined$term <- as.character(results_combined$term)
results_combined$term <- as.factor(results_combined$term)

# Factorizing context name
results_combined <- results_combined %>%
  mutate(reg = fct_relevel(reg, "Jackson","Sotomayor","Pooled"))

# Making the figure
fig_race <- ggplot(results_combined, aes(x = reg, y = coef, group=term, color=term)) +
  geom_point(size=2,position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = lb, ymax = ub), width = 0.1, position=position_dodge(width=0.5)) +
  geom_errorbar(aes(ymin = lb2, ymax = ub2), size=1, width = 0, position=position_dodge(width=0.5)) +
  geom_hline(yintercept = 0, lty = 2) +
  xlab("Nominee") + theme_bw() + 
  ylab("Coefficient Estimate") +
  coord_flip() + labs(title="") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.title=element_blank())+ facet_grid(. ~ race)+ scale_color_manual(values=c("#999999", "mediumpurple4")) +
  theme(legend.position = "bottom")

# Outputting
ggsave("Paper/Figure2.png", fig_race, width = 10, height = 7) 

##################################################################
# Table 3: Regressions of senator-level messaging about religion #
##################################################################
# Total number of positive religion messages
sum(senator_level_data$total_pos_relig)

# Subsetting senator-level messaging data to Barrett nomination
barrett_data <- senator_level_data[which(senator_level_data$nomination == "barrett"),]
# Descriptive statistics on men/women discussion of Barrett religion
round(prop.table(table(barrett_data$any_positive_relig, barrett_data$female), margin = 1),2)
# Discussion by party
table(barrett_data$total_pos_relig, barrett_data$democrat)
sum(barrett_data$total_pos_relig[which(barrett_data$democrat == 0)])

# Running the models, Republican Senators only
# Any mention
any_pos_b <- lm(any_positive_relig ~ party_leader + on_sjc + same_religion + state_prop_catholic + state_prop_evangelical, data=senator_level_data %>% filter(nomination == "barrett", democrat == 0))
summary(any_pos_b)
# Number of mentions
count_pos_b <- lm(total_pos_relig ~ party_leader + on_sjc + same_religion + state_prop_catholic + state_prop_evangelical, data=senator_level_data %>% filter(nomination == "barrett", democrat == 0))
summary(count_pos_b)

# Regressions into a latex table
religion <- stargazer(any_pos_b,count_pos_b,
          model.numbers=FALSE,
          digits=2, dep.var.labels=c("Any Discussion","Count of Discussion"),
          covariate.labels=c("Party Leader","On Judiciary Committee","Catholic","State Proportion Catholic", "State Proportion Evangelical"),
          star.cutoffs=c(0.10,0.05),
          no.space=T,
          notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: The table shows the results from OLS regressions of a Republican senator messaging about Amy Coney Barrett's religion as a function of senator characteristics. $^{**}p<0.05$; $^*p<0.1$.}", 
          omit = c("state[A-Z][A-Z]","nomination*"),
          omit.stat = c("adj.rsq","f"),
          title = "Determinants of Republican Senatorial Messaging about Barrett's Religion",
          label = "ols-gop-messaging-barrett",
          notes.append = FALSE,notes.label = "",
          font.size = "small",
          table.placement = "H")

# Outputting
cat(religion, sep = '\n', file = "Paper/Table3.tex")
